use "~/Dropbox/Wikipedia/final-data/Data Archive/Kalla_Aronow_2015.dta", clear

*****************
//Set-up the file
*****************

gen democrat=party=="Democratic" | party=="Democratic, Independent" | party=="Democratic-Farmer-Labor" | party=="Independent (Democratic Socialist)"
gen yearsinsenate=2014-lengthofincumbency
gen round2 = round
replace round2 = 5.5 if round == 5 & treatment_post_order > 50
gen dead = time_alive != .
gen curr_time = time_alive
replace curr_time =   50e+08 if curr_time == .
replace curr_time = 0.0001 if curr_time == 0
gen hours_alive=curr_time*2.77778e-7
gen dateentry = (time_edit- 1.72e+12)/ 3.79e+09
gen hourentry = ss(time_edit)
gen logpopulation = log(population)
gen logpagechar = log(pagechar)
gen treat_pos = 0
	replace treat_pos = 1 if treat_final == 1 | treat_final == 2
gen cited = 0
	replace cited = 1 if treat_final == 1 | treat_final == 3
stset hours_alive, failure(dead)

*****************
*** Main findings:
*****************

*Negative facts are 36% more likely to be removed by Wikipedia editors than positive facts within 12 hours, 
count if (treat_final==1 | treat_final==2) & round!=3 & round < 5 & dead==1 & hours_alive<=12
count if (treat_final==3 | treat_final==4) & round!=3 & round < 5 & dead==1 & hours_alive <=12
disp ((abs(75 - 52)/((75 + 52)/2)) * 100)

*and 29% more likely within 3 days
count if (treat_final==1 | treat_final==2) & round!=3 & round < 5 & dead==1 & hours_alive<=72
count if (treat_final==3 | treat_final==4) & round!=3 & round < 5 & dead==1 & hours_alive <=72
disp ((abs(92 - 69)/((92 + 69)/2)) * 100)

*Uncited facts are 43% more likely to be removed by Wikipedia editors than positive facts within 4 hours, 
count if (treat_final==1 | treat_final==3) & (round==1 | round==4)& round < 5 & dead==1 & hours_alive<=4
count if (treat_final==2 | treat_final==4) & (round==1 | round==4)& round < 5 & dead==1 & hours_alive <=4
disp ((abs(62 - 32)/((62 + 32)/2)) * 100)


*****************
//Balance Checks
*****************

*Study 1 S1 Table. Covariate balance for Study 1
tabstat democrat class yearsinsenate population pagechar if round==1, by(treat_final) statistics(mean semean n) format(%12.2f)
mlogit treat_final democrat class yearsinsenate population pagechar if round==1

*Study 2 S2 Table. Covariate balance for Study 2
tabstat democrat class yearsinsenate population pagechar if round==2, by(treat_final) statistics(mean semean n) format(%12.2f)
mlogit treat_final democrat class yearsinsenate population pagechar if round==2

*Study 3 S3 Table. Covariate balance for Study 3
gen lastyearinsenate=substr(term,-4,4)
destring lastyearinsenate, replace
tabstat democrat living lastyearinsenate if round==3, by(treat_final) statistics(mean semean n) format(%12.2f)
mlogit treat_final democrat living lastyearinsenate if round==3

*Study 4 S4 Table. Covariate balance for Study 4
tabstat democrat class yearsinsenate population pagechar if round==4, by(treat_final) statistics(mean semean n) format(%12.2f)
mlogit treat_final democrat class yearsinsenate population pagechar if round==4

*Study 5 S5 Table. Covariate balance for Study 5
tabstat democrat class yearsinsenate population pagechar if round==5, by(treat_final) statistics(mean semean n) format(%12.2f)
mlogit treat_final democrat class yearsinsenate population pagechar if round==5


*****************
//Robustness Tables
*****************
//S6 Table: Pooled Cox Regression Estimates
*Pooling across studies, negative facts are far more likely to be removed than positive facts by other Wikipedia editors (hazard ratio=0.72, p=0.02)
xi: stcox i.treat_pos i.round if round != 3 & round < 5, robust
*Consistent with Wikipedia�s policy, uncited but true facts are 43% more likely to be removed than cited facts within four hours (hazard ratio=0.43, p=<0.001)
xi: stcox i.cited i.round if round == 1 | round == 4, robust

//S7 Table:  Covariate Adjusted Cox Regression Pooled Estimates
* Replicating results w/ covariate adjustment -- check against planning
xi: stcox i.treat_pos influen dateentry hourentry i.round republican i.class i.region lengthofinc logpagechar logpopulation if round != 3 & round < 5, robust
xi: stcox i.cited influen dateentry hourentry i.round republican i.class i.region lengthofinc logpagechar  logpopulation if round == 1 | round == 4, robust

//S8 Table: Study 3 Cox Regression Estimates for Dead and Retired Senators
*Replication on Wikipedia pages of dead and retired Senators
xi: stcox treat_pos if round == 3, robust
xi: stcox treat_pos if round == 3 & living==1, robust
xi: stcox treat_pos if round == 3 & living==0, robust

//S9 Table: Study by Study Cox Regression Estimates, without Covariate Adjustment
by round2, sort:  stcox treat_pos cited, robust

//S10 Table: Study by Study Cox Regression Estimates, with Covariate Adjustment
xi: by round2, sort:  stcox treat_pos cited influen dateentry hourentry republican i.class i.region lengthofinc logpagechar logpopulation if round2<5, robust
* solving optimization problem -- dropping dateentry hourentry
xi: stcox treat_pos cited influen republican i.class i.region lengthofinc logpagechar logpopulation if round2 == 5, robust
xi: stcox treat_pos cited influen dateentry hourentry republican i.class i.region lengthofinc logpagechar logpopulation if round2 == 5.5, robust


//S11 Table: Interaction between Positive and Cited from Cox Regressions
* No evidence of interaction
xi: stcox i.treat_pos*i.cited i.round2 if (round == 1 | round == 4), robust
xi: stcox i.treat_pos*i.cited i.round2 if (round == 1 | round == 4 | round2 == 5), robust

//S12 Table: Interaction between Positive and Cited from Cox Regressions, with Covariate Adjustment
* interaction with covariates
xi: stcox i.treat_pos*i.cited influen dateentry hourentry i.round2 republican i.class i.region lengthofinc logpagechar logpopulation if (round == 1 | round == 4), robust
xi: stcox i.treat_pos*i.cited influen dateentry hourentry i.round2 republican i.class i.region lengthofinc logpagechar logpopulation  if (round == 1 | round == 4 | round2 == 5), robust

//S13 Table: Upcoming Elections as Moderator from Cox Regressions
* Upcoming elections 
gen up = 0
replace up = 1 if class == 2
* does not predict rate of removal in general
xi: stcox up if round!=3 & round < 5, robust 
* does majorly moderate treatment effects on citedness (strengthens effects), no evidence at p < 0.05 level that it's moderating effects on positive/neg
xi: stcox i.treat_pos*up i.round2 if round!=3 & round < 5, robust
xi: stcox i.cited*up i.round2 if (round == 1 | round == 4), robust
xi: stcox i.treat_pos*up i.round2 dateentry hourentry republican i.class i.region lengthofinc logpagechar logpopulation if round!=3 & round < 5, robust
xi: stcox i.cited*up i.round2 dateentry hourentry republican i.class i.region lengthofinc logpagechar logpopulation if (round == 1 | round == 4), robust


//S14 Table: Covariates as Moderators of Positive Edits from Cox Regressions
* No strong evidence of moderation, some suggestive efficence that influence and length of incumbency moderate citedness (strengthens effects)
xi: stcox i.treat_pos*republican i.treat_pos*influen  i.treat_pos*i.region i.treat_pos*lengthofinc i.treat_pos*logpagechar i.treat_pos*logpopulation i.round2 if round != 3 & round < 5, robust
testparm _ItreX*
xi: stcox i.treat_pos*republican i.treat_pos*influen  i.treat_pos*i.region i.treat_pos*lengthofinc i.treat_pos*logpagechar i.treat_pos*logpopulation i.round2 if round != 3, robust
testparm _ItreX*


//S15 Table: Covariates as Moderators of Cited Edits from Cox Regressions 
xi: stcox i.cited*republican i.cited*influen  i.cited*i.region i.cited*lengthofinc i.cited*logpagechar i.cited*logpopulation i.round2 if round != 3 & round < 5, robust
testparm _IcitX*
xi: stcox i.cited*republican i.cited*influen  i.cited*i.region i.cited*lengthofinc i.cited*logpagechar i.cited*logpopulation i.round2 if round != 3, robust
testparm _IcitX*

//S16 Table: Covariates Predicting Length of Survival from Cox Regressions
* Other traits, not jointly significant in predicting length of survival
xi: stcox republican influen  i.region lengthofinc logpagechar logpopulation if round != 3, robust

//S17 Table: OLS Estimates on Survival at 6 Hours and 12 Hours
* checking robustness with OLS
*Coded as 1 if still alive after 6 hours, 0 otherwise
gen hour6 = 1
replace hour6 = 0 if hours_alive <= 6
gen hour12 = 1
replace hour12 = 0 if hours_alive <= 12
gen hour24 = 1
replace hour24 = 0 if hours_alive <= 24
gen day7 = 1
replace day7 = 0 if hours_alive <= 24*7

xi: reg hour6 i.treat_pos i.round if round != 3 & round < 5, robust
xi: reg hour6 i.cited i.round if round == 1 | round == 4, robust
reg hour6 treat_pos if round == 3, robust
xi: reg hour12 i.treat_pos i.round if round != 3 & round < 5, robust
xi: reg hour12 i.cited i.round if round == 1 | round == 4, robust
reg hour12 treat_pos if round == 3, robust

//S18 Table: OLS Estimates on Survival at 24 Hours and 7 Days
xi: reg hour24 i.treat_pos i.round if round != 3 & round < 5, robust
xi: reg hour24 i.cited i.round if round == 1 | round == 4, robust
reg hour24 treat_pos if round == 3, robust

xi: reg day7 i.treat_pos i.round if round != 3 & round < 5, robust
xi: reg day7 i.cited i.round if round == 1 | round == 4, robust
reg day7 treat_pos if round == 3, robust

